Setup

Load R libraries

library(tidyr)    # (Wickham & Henry, 2018)
library(ggplot2)  # (Wickham, 2009)
library(plyr)     # (Wickham, 2011)
library(dplyr)    # (Wickham et al., 2018)
library(cowplot)  # (Wilke, 2018)

Load data

Extract most performant organisms

extract_max_fit <- function(data_path) {
  data <- read.csv(data_path, na.strings="NONE")
  data$matchbin_metric <- factor(data$matchbin_metric,
                                  levels=c("hamming", 
                                           "integer", 
                                            "integer-symmetric", 
                                            "hash", 
                                            "streak", 
                                            "streak-exact"))
  data$tag_mut_rate <- factor(as.numeric(data$MUT_RATE__FUNC_TAG_BF))
  return(data)
}

exp_name <- "2020-03-23"
out_dir <- "2020-03-23-step-2"
dst_data_loc <- paste("../data/",exp_name,"/step-two/dir-sig/max_fit_orgs.csv", sep="")
dst_data <- extract_max_fit(dst_data_loc)
cst_data_loc <- paste("../data/",exp_name,"/step-two/chg-env/max_fit_orgs.csv", sep="")
cst_data <- extract_max_fit(cst_data_loc)

num_replicates <- 200

Extract fitness over time

dst_fot_data_loc <- paste("../data/", exp_name, "/step-two/dir-sig/fot.csv", sep="")
dst_fot_data <- read.csv(dst_fot_data_loc, na.strings="NONE")
dst_fot_data$tag_metric <- factor(dst_fot_data$tag_metric,
                                    levels=c("hamming", 
                                             "integer", 
                                             "integer-symmetric", 
                                             "hash", 
                                             "streak", 
                                             "streak-exact"))
dst_fot_data$tag_mut_rate <- factor(as.numeric(dst_fot_data$tag_mut_rate))

cst_fot_data_loc <- paste("../data/",exp_name,"/step-two/chg-env/fot.csv", sep="")
cst_fot_data <- read.csv(cst_fot_data_loc, na.strings="NONE")
cst_fot_data$matchbin_metric <- factor(cst_fot_data$matchbin_metric,
                                    levels=c("hamming",
                                             "integer",
                                             "integer-symmetric",
                                             "hash",
                                             "streak",
                                             "streak-exact"))
cst_fot_data$tag_mut_rate <- factor(as.numeric(cst_fot_data$MUT_RATE__FUNC_TAG_BF))

Directional Signal Task

Solutions x metric

generation_cutoffs <- c(500, 1000, 3000, 5000)
exp_prefix <- "dst"
for (gen in generation_cutoffs) {
  p <- ggplot(filter(dst_data, solution=="1" & update <= gen), aes(x=matchbin_metric, fill=matchbin_metric)) +
    geom_bar(stat="count", position="dodge") +
    geom_text(stat="count", 
            mapping=aes(label=..count..), 
            position=position_dodge(0.9), vjust=0) +
    ggtitle(paste("Directional signal task - solutions\n","<= ", gen, sep="")) +
    ylim(0, num_replicates+2) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-solutions-",gen,".png",sep=""), width=16, height=8)
  print(p)
}

Compare each metric’s successes using Fisher’s exact.

do_ft <- function(metric_a, metric_b, n) {
  a_successes <- 
    nrow(filter(dst_data, matchbin_metric==metric_a & solution=="1"))
  b_successes <- 
    nrow(filter(dst_data, matchbin_metric==metric_b & solution=="1"))
  table <-
    matrix(c(a_successes, 
             b_successes,
             n-a_successes,
             n-b_successes),
           nrow=2)
  rownames(table) <- c(metric_a, metric_b)
  colnames(table) <- c("success", "fail")
  ft <- fisher.test(table)
  return(ft)
}

# hamming_integer
hamming_integer_ft <- 
  do_ft("hamming", "integer", num_replicates)
# hamming_integer_sym
hamming_integer_sym_ft <- 
  do_ft("hamming", "integer-symmetric", num_replicates)
# hamming_hash
hamming_hash_ft <- 
  do_ft("hamming", "hash", num_replicates)
# hamming_streak
hamming_streak_ft <- 
  do_ft("hamming", "streak", num_replicates)
# streak_integer
streak_integer_ft <- 
  do_ft("streak", "integer", num_replicates)
# streak_integer_sym
streak_integer_sym_ft <- 
  do_ft("streak", "integer-symmetric", num_replicates)
# streak_hash
streak_hash_ft <- 
  do_ft("streak", "hash", num_replicates)
# hash_integer
hash_integer_ft <- 
  do_ft("hash", "integer", num_replicates)
# hash_integer_sym
hash_integer_sym_ft <- 
  do_ft("hash", "integer-symmetric", num_replicates)
# integer_integer_sym
integer_integer_sym_ft <- 
  do_ft("integer", "integer-symmetric", num_replicates)

p_values <- list(hamming_v_integer = hamming_integer_ft$p.value,
                 hamming_v_integer_sym = hamming_integer_sym_ft$p.value,
                 hamming_v_hash = hamming_hash_ft$p.value,
                 hamming_v_streak = hamming_streak_ft$p.value,
                 streak_v_integer = streak_integer_ft$p.value,
                 streak_v_integer_sym = streak_integer_sym_ft$p.value,
                 streak_v_hash = streak_hash_ft$p.value,
                 hash_v_integer = hash_integer_ft$p.value,
                 hash_v_integer_sym = hash_integer_sym_ft$p.value,
                 integer_v_integer_sym = integer_integer_sym_ft$p.value)
adjusted <- p.adjust(p_values,
                     method="bonferroni")

for (key in names(adjusted)) {
  print(paste("Comparison: ", key, sep=""))
  adjusted_p <- adjusted[[key]]
  print(paste("  adjusted p value: ", adjusted_p, sep=""))
  if (adjusted_p < 0.05) { print("  *significant") }
}
## [1] "Comparison: hamming_v_integer"
## [1] "  adjusted p value: 3.77150637252631e-15"
## [1] "  *significant"
## [1] "Comparison: hamming_v_integer_sym"
## [1] "  adjusted p value: 1.2535415887387e-17"
## [1] "  *significant"
## [1] "Comparison: hamming_v_hash"
## [1] "  adjusted p value: 0.00269157528710225"
## [1] "  *significant"
## [1] "Comparison: hamming_v_streak"
## [1] "  adjusted p value: 0.620977586899191"
## [1] "Comparison: streak_v_integer"
## [1] "  adjusted p value: 1.33179573482674e-22"
## [1] "  *significant"
## [1] "Comparison: streak_v_integer_sym"
## [1] "  adjusted p value: 1.51925638619314e-25"
## [1] "  *significant"
## [1] "Comparison: streak_v_hash"
## [1] "  adjusted p value: 2.33108697434928e-07"
## [1] "  *significant"
## [1] "Comparison: hash_v_integer"
## [1] "  adjusted p value: 7.13236630604939e-05"
## [1] "  *significant"
## [1] "Comparison: hash_v_integer_sym"
## [1] "  adjusted p value: 2.26535367609043e-06"
## [1] "  *significant"
## [1] "Comparison: integer_v_integer_sym"
## [1] "  adjusted p value: 1"

Scores x metric

exp_prefix <- "dst"
p <- ggplot(dst_data, aes(x=matchbin_metric, y=aggregate_score, fill=matchbin_metric)) +
    geom_boxplot() +
    ggtitle(paste("Directional signal task - scores", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-scores.png",sep=""), width=16, height=8)
print(p)

Time to solution x metric

exp_prefix <- "dst"
p <- ggplot(dst_data, aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
    geom_boxplot() +
    geom_jitter() +
    ggtitle(paste("Directional signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-time-to-solution.png",sep=""), width=16, height=8)
print(p)

ggplot(dst_data, aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
    # geom_boxplot() +
    geom_jitter(alpha=0.75) +
    stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
    stat_summary(fun.y = mean, geom = "point", colour = "black") +
    ggtitle(paste("Directional signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8))

metrics <- c("hamming", "hash", "integer", "integer-symmetric", "streak")
dst_update_rankings <- data.frame(matchbin_metric=character(), 
                                  aggregate_score=numeric(), 
                                  update=numeric(),
                                  update_rank=numeric(),
                                  solution=numeric())
for (metric in metrics) {
  dst_metric <- 
    filter(dst_data, matchbin_metric==metric)
  dst_metric <- 
    subset.data.frame(dst_metric, select=c("aggregate_score", 
                                           "update", 
                                           "matchbin_metric",
                                           "solution"))
  dst_metric$update <- as.numeric(dst_metric$update)
  dst_metric$update_ranking <- rank(dst_metric$update, ties.method="random")
  dst_update_rankings <- rbind(dst_update_rankings, dst_metric)
}

exp_prefix <- "dst"
ggplot(filter(dst_update_rankings, update_ranking <= 40), aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
    geom_jitter(aes(color=matchbin_metric)) +
    geom_boxplot(alpha=0.75) +
    ggtitle(paste("Directional signal task - time to solution (first 40)", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top40-bp.png", sep=""))
## Saving 7 x 5 in image

ggplot(filter(dst_update_rankings, update_ranking <= 40), aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
    geom_jitter(alpha=0.75) +
    stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
    stat_summary(fun.y = mean, geom = "point", colour = "black") +
    ggtitle(paste("Directional signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top40-ci.png", sep=""))
## Saving 7 x 5 in image

dst_first_40_solutions <- filter(dst_update_rankings, update_ranking <= 40)
kruskal.test(formula = update ~ matchbin_metric, data=dst_first_40_solutions)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  update by matchbin_metric
## Kruskal-Wallis chi-squared = 131.58, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(x=dst_first_40_solutions$update,
                     g=dst_first_40_solutions$matchbin_metric,
                     p.adjust.method = "bonferroni",
                     exact=FALSE,
                     conf.int=TRUE)
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  dst_first_40_solutions$update and dst_first_40_solutions$matchbin_metric 
## 
##                   hamming integer integer-symmetric hash   
## integer           3.3e-11 -       -                 -      
## integer-symmetric 2.0e-11 1.00000 -                 -      
## hash              3.8e-06 0.00093 7.2e-06           -      
## streak            6.0e-06 4.3e-13 1.3e-12           4.8e-10
## 
## P value adjustment method: bonferroni

Fitness over time x metric

updates <- c(10, 30, 50, 100, 300, 500, 1000, 3000, 5000)
plot_data <- filter(dst_fot_data, update %in% updates)
plot_data$update <- factor(plot_data$update)

ggplot(plot_data, aes(x=update, y=score, fill=tag_metric)) +
  geom_boxplot() +
  ggtitle("DST - score over time") +
  ggsave(paste("./imgs/",out_dir,"/dst_score_over_time_box.pdf", sep=""), width=21, height=8)

ggplot(filter(dst_fot_data), aes(x=update, y=score, color=tag_metric, fill=tag_metric)) +
  stat_summary(geom = "line", fun.y = mean) +
  stat_summary(geom = "ribbon", fun.data = mean_cl_boot, fun.args=list(conf.int=0.95), alpha = 0.3, color=NA) +
  ggtitle("DST - score over time") +
  ggsave(paste("./imgs/",out_dir,"/dst_score_over_time.pdf", sep=""), width=21, height=10)

Changing Signal Task

Solutions x metric

generation_cutoffs <- c(50, 100)
exp_prefix <- "cst"
for (gen in generation_cutoffs) {
  p <- ggplot(filter(cst_data, solution=="1" & update <= gen), aes(x=matchbin_metric, fill=matchbin_metric)) +
    geom_bar(stat="count", position="dodge") +
    geom_text(stat="count", 
            mapping=aes(label=..count..), 
            position=position_dodge(0.9), vjust=0) +
    ggtitle(paste("Changing signal task - solutions\n","<= ", gen, sep="")) +
    ylim(0, num_replicates+2) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-solutions-",gen,".png",sep=""), width=16, height=8)
  print(p)
}

Compare each metric’s successes using Fisher’s exact.

do_ft <- function(data, metric_a, metric_b, n) {
  a_successes <- 
    nrow(filter(data, matchbin_metric==metric_a & solution=="1"))
  b_successes <- 
    nrow(filter(data, matchbin_metric==metric_b & solution=="1"))
  table <-
    matrix(c(a_successes, 
             b_successes,
             n-a_successes,
             n-b_successes),
           nrow=2)
  rownames(table) <- c(metric_a, metric_b)
  colnames(table) <- c("success", "fail")
  ft <- fisher.test(table)
  return(ft)
}

metrics <- c("hamming", "hash", "streak", "integer", "integer-symmetric")
ft_results = list()
ft_p_values = list()
for (i in seq(1, length(metrics))) {
  for (k in seq(i, length(metrics))) {
    if (i == k) { next() }
    # print(paste("i = ", i, "; k = ", k, sep=""))
    metric_a <- metrics[i]
    metric_b <- metrics[k]
    comp_name <- paste(metric_a, "_vs_", metric_b, sep="")
    # print(comp_name)
    ft_results[[comp_name]] <- do_ft(cst_data, metric_a, metric_b, num_replicates)
    ft_p_values[[comp_name]] <- ft_results[[comp_name]]$p.value
  }
}

adjusted <- p.adjust(ft_p_values,
                     method="bonferroni")

for (key in names(adjusted)) {
  print(paste("Comparison: ", key, sep=""))
  adjusted_p <- adjusted[[key]]
  print(paste("  adjusted p value: ", adjusted_p, sep=""))
  if (adjusted_p < 0.05) { print("  *significant") }
}
## [1] "Comparison: hamming_vs_hash"
## [1] "  adjusted p value: 4.83259611792869e-11"
## [1] "  *significant"
## [1] "Comparison: hamming_vs_streak"
## [1] "  adjusted p value: 1"
## [1] "Comparison: hamming_vs_integer"
## [1] "  adjusted p value: 2.36747458507527e-38"
## [1] "  *significant"
## [1] "Comparison: hamming_vs_integer-symmetric"
## [1] "  adjusted p value: 6.82464555680076e-37"
## [1] "  *significant"
## [1] "Comparison: hash_vs_streak"
## [1] "  adjusted p value: 4.83259611792869e-11"
## [1] "  *significant"
## [1] "Comparison: hash_vs_integer"
## [1] "  adjusted p value: 4.03183617537378e-11"
## [1] "  *significant"
## [1] "Comparison: hash_vs_integer-symmetric"
## [1] "  adjusted p value: 3.0335335243374e-10"
## [1] "  *significant"
## [1] "Comparison: streak_vs_integer"
## [1] "  adjusted p value: 2.36747458507527e-38"
## [1] "  *significant"
## [1] "Comparison: streak_vs_integer-symmetric"
## [1] "  adjusted p value: 6.82464555680076e-37"
## [1] "  *significant"
## [1] "Comparison: integer_vs_integer-symmetric"
## [1] "  adjusted p value: 1"

Time to solution x metric

exp_prefix <- "cst"
p <- ggplot(cst_data, aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
    geom_boxplot() +
    geom_jitter() +
    ggtitle(paste("Changing signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/",out_dir,"/", exp_prefix, "-time-to-time-to-solution.png",sep=""), width=16, height=8)
print(p)

ggplot(cst_data, aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
    geom_jitter(alpha=0.75) +
    stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
    stat_summary(fun.y = mean, geom = "point", colour = "black") +
    ggtitle(paste("Changing signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8))

metrics <- c("hamming", "hash", "integer", "integer-symmetric", "streak")
cst_update_rankings <- data.frame(matchbin_metric=character(), 
                                  score=numeric(), 
                                  update=numeric(),
                                  update_rank=numeric(),
                                  solution=numeric())
for (metric in metrics) {
  cst_metric <- 
    filter(cst_data, matchbin_metric==metric)
  cst_metric <- 
    subset.data.frame(cst_metric, select=c("score", 
                                           "update", 
                                           "matchbin_metric",
                                           "solution"))
  cst_metric$update <- as.numeric(cst_metric$update)
  cst_metric$update_ranking <- rank(cst_metric$update, ties.method="random")
  cst_update_rankings <- rbind(cst_update_rankings, cst_metric)
}

exp_prefix <- "cst"
ggplot(filter(cst_update_rankings, update_ranking <= 50), aes(x=matchbin_metric, y=update, fill=matchbin_metric)) +
    geom_jitter(aes(color=matchbin_metric)) +
    geom_boxplot(alpha=0.75) +
    ggtitle(paste("Changing signal task - time to solution (first 50)", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top50-bp.png", sep=""))
## Saving 7 x 5 in image

ggplot(filter(cst_update_rankings, update_ranking <= 50), aes(x=matchbin_metric, y=update, color=matchbin_metric)) +
    geom_jitter(alpha=0.75) +
    stat_summary(fun.data=mean_cl_boot, fun.args=list(conf.int=0.95), geom="errorbar", color="black") +
    stat_summary(fun.y = mean, geom = "point", colour = "black") +
    ggtitle(paste("Changing signal task - time to solution", sep="")) +
    theme(axis.text.x=element_text(size=8)) +
    ggsave(paste("./imgs/", out_dir,"/", exp_prefix, "-time-to-solution-top50-ci.png", sep=""))
## Saving 7 x 5 in image

cst_first_50_solutions <- filter(cst_update_rankings, update_ranking <= 50)
kruskal.test(formula = update ~ matchbin_metric, data=cst_first_50_solutions)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  update by matchbin_metric
## Kruskal-Wallis chi-squared = 100.44, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(x=cst_first_50_solutions$update,
                     g=cst_first_50_solutions$matchbin_metric,
                     p.adjust.method = "bonferroni",
                     exact=FALSE,
                     conf.int=TRUE)
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  cst_first_50_solutions$update and cst_first_50_solutions$matchbin_metric 
## 
##                   hamming integer integer-symmetric hash   
## integer           2.1e-10 -       -                 -      
## integer-symmetric 3.7e-08 0.19    -                 -      
## hash              3.2e-09 0.12    1.00              -      
## streak            1.00    1.4e-10 2.9e-08           2.2e-09
## 
## P value adjustment method: bonferroni

Fitness over time x metric

updates <- c(10, 30, 50, 100, 300, 500, 1000, 3000, 5000)
plot_data <- filter(cst_fot_data, update %in% updates)
plot_data$update <- factor(plot_data$update)

ggplot(plot_data, aes(x=update, y=score, fill=matchbin_metric)) +
  geom_boxplot() +
  ggtitle("CST - score over time") +
  ggsave(paste("./imgs/",out_dir,"/cst_score_over_time_box.pdf", sep=""), width=21, height=8)

ggplot(filter(cst_fot_data), aes(x=update, y=score, color=matchbin_metric, fill=matchbin_metric)) +
  stat_summary(geom = "line", fun.y = mean) +
  stat_summary(geom = "ribbon", fun.data = mean_cl_boot, fun.args=list(conf.int=0.95), alpha = 0.3, color=NA) +
  ggtitle("CST - score over time") +
  ggsave(paste("./imgs/",out_dir,"/cst_score_over_time.pdf", sep=""), width=21, height=10)